Experiment Summary loader

In [2]:
from IPython.display import Javascript, display
from ipywidgets import widgets

def run_all(ev):
    display(Javascript('IPython.notebook.execute_cells_below()'))

button = widgets.Button(description="Click to load experiment summary:")
button.on_click(run_all)
display(button)
In [3]:
# initializes dbconn and notebook settings
# imports relevant data from mysql db

import pandas as pd
import numpy as np
import sqlalchemy as sa
import glob
import os
import sys
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
import datetime
%matplotlib inline  
#%pylab inline
pylab.rcParams['figure.figsize'] = (15, 25)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import seaborn as sns

import qgrid
qgrid.nbinstall(overwrite=True) 
qgrid.set_defaults(remote_js=True, show_toolbar=True)

import loaderplots as lp
from scipy.stats import stats, ttest_ind, ttest_rel, rankdata

#engine = sa.create_engine('mysql+pymysql://reeluser:R3el$olarX@localhost/rsi?charset=utf8')
engine = sa.create_engine('mysql+pymysql://reeluser:R3el$olarX@192.168.21.175/rsi?charset=utf8')
conn = engine.connect()

#cds, cells, exp, fullexp, fullprocess, ivt, ivtdfmod, mods, modtrends, process, sencera, trends, xrf 

#senceradf = pd.read_sql("sencera", conn, index_col="sencera_index")
#processdf = pd.read_sql("process", conn, index_col="process_index")
#expdf = pd.read_sql("exp", conn, index_col="exp_index")
#celldf = pd.read_sql("cells", conn, index_col="cells_index")
#moddf = pd.read_sql("mods", conn, index_col="mods_index")
#%time trends = pd.read_sql("trends", conn, index_col="trends_index")
#xrf = pd.read_sql("xrf", conn, index_col="xrf_index")
#ivtdf = pd.read_sql("ivt", conn, index_col="ivt_index")
#eddf = pd.read_sql("ed", conn, index_col="ed_index")

import time 
now = time.strftime("%c")
print ("Updated at: %s"  % now )

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 
Updated at: Thu Aug  4 10:32:37 2016

To find experiments by topic:

  1. use the input box below to search by topic
  2. choose experiment ID from list
  3. input experiment ID in the box below the list
In [4]:
topic = (input("Enter experimental topic/description: "))

#cursor = conn.cursor()
#cursor.execute('SELECT DISTINCT `Experiment_ID`, `Description` FROM exp WHERE `Description` OR `Split1` OR `Split2` OR `Split3` LIKE "%{0}%";', (topic))
query = 'SELECT DISTINCT Experiment_ID, Description, Split1, Split2 FROM fullexp WHERE Description LIKE "%%{0}%%" OR Split1 LIKE "%%{0}%%" OR Split2 LIKE "%%{0}%%" OR Split3 LIKE "%%{0}%%";'
query = query.format(topic)

print ("\n")
print ("List of experiments involving %s updated at %s" % (topic, now) )

topicdf = pd.read_sql_query(query, conn)
qgrid.show_grid(topicdf, grid_options={'forceFitColumns': False, 'defaultColumnWidth': 200})
Enter experimental topic/description: sencera


List of experiments involving sencera updated at Thu Aug  4 10:32:37 2016

click "export" on the table to save it for offline use when exporting as html.

To load data and output summary, input Experiment ID below:

In [5]:
expt = int(input("Enter experiment number: "))

#query = 'SELECT * from trends WHERE `Experiment_ID` = %d;' % expt
query = 'SELECT * from filttrends WHERE `Experiment_ID` = %d;' % expt


df = pd.read_sql_query(query, conn)
print ("Imported Expt. %d at %s" % (expt, now) )
#'{0} in {1}'.format(unicode(self.author,'utf-8'),unicode(self.publication,'utf-8'))
Enter experiment number: 715
Imported Expt. 715 at Thu Aug  4 10:32:37 2016
In [6]:
#pd.set_option('display.notebook_repr_html', True)

#def _repr_latex_(self):
#    return "\centering{%s}" % self.to_latex()

#pd.DataFrame._repr_latex_ = _repr_latex_  # monkey patch pandas DataFrame
module plots

Below, ranked module performance data.

In [7]:
#create module df
modsdf = df[df.Cell_Count > 19]

def mod_shunted(row):
    if row['Rsh_Ohm'] <=500.:
        return 1
    
modsdf.loc[:,'shunted'] = modsdf.apply(lambda row: mod_shunted(row), axis=1)

def mod_shunt(row):
    if row['Rsh'] <=50.:
        return 1
    
modsdf.loc[:,'shunt_count'] = modsdf.apply(lambda row: mod_shunt(row), axis=1)

#create nonshunted module df
nonshuntedmods = modsdf[modsdf.loc[:,'shunted'] != 1]

modsdflight = modsdf[~modsdf['Measurement'].str.contains('.*Dark.*', na = False)]
nonshuntedlight = nonshuntedmods[~nonshuntedmods['Measurement'].str.contains('.*Dark.*', na = False)]

#flatui1 = ["#3498db","#e74c3c", "#2ecc71", "#34495e", "#95a5a6", "#9b59b6", "#3498db", "#e74c3c",]
#sns.set_palette(flatui1)

#lp.mod_por_plots(modsdf, '%s' %expt)

query = 'SELECT Split1, Split2, Split3 from fullexp WHERE `Experiment_ID` = %s;' % expt
dfs = pd.read_sql_query(query, conn)

splitexist = []
splitexist = (dfs.isnull().all())
splitdict = dict(splitexist)
splitdict

def removeit(l):
    d = dict(l)
    for i in d.copy():   #if not d[i] returns true (null) splits
        if d[i] == True:
            d.pop(i)
    return d

splitlist = removeit(splitexist)

modstrimmed =  modsdflight.groupby('Sample_ID', as_index=False).max()
nonshuntedtrimmed = nonshuntedlight.groupby('Sample_ID', as_index=False).max()
c:\anaconda3\lib\site-packages\pandas\core\indexing.py:288: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
c:\anaconda3\lib\site-packages\pandas\core\indexing.py:465: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
In [8]:
lp.rankedfiltered(splitlist, modstrimmed, '%s' %expt)
Module, Nonshunted module, or cell?  module
generating figure for Split2
<matplotlib.figure.Figure at 0x1441b77aeb8>
generating figure for Split3
generating figure for Split1

here is just a simple chart showing shunted modules for POR vs EXP

In [9]:
#lp.shunts(splitlist, modstrimmed, '%s' %expt)

we can quickly output experimental stats

In [10]:
grouped = modsdflight.groupby('POR', as_index=True)

#exp is 0

modstats = pd.DataFrame(grouped.describe())
#modstats = modstats.set_index(['POR'])
print("Expt %d module stats: " % expt)
#modstats = modstats.drop('trends_index', axis=1)
modstats = modstats.drop('Experiment_ID', axis=1)
modstats = modstats.drop('Cell_Area_mm2', axis=1)
modstats = modstats.drop('Cell_Count', axis=1)
modstats = modstats.drop('Cell_no', axis=1)
modstats
#qgrid.show_grid(modstats, grid_options={'forceFitColumns': False, 'defaultColumnWidth': 200})
Expt 715 module stats: 
c:\anaconda3\lib\site-packages\numpy\lib\function_base.py:3823: RuntimeWarning: Invalid value encountered in percentile
  RuntimeWarning)
Out[10]:
CdS Thickness (nm) CdTe Thickness in Angstroms Efficiency FillFactor Isc_mA Jsc Pmp_mW Roc_Ohm Rs Rs_Ohm Rsc_Ohm Rsh Rsh_Ohm Temp_C Vmp_V Voc Voc_V filttrends_index percentEff percentFF shunt_count
POR
EXP count 4.000000 4.000000 4.000000 4.000000 4.000000 4.000000 4.000000 4.000000 4.000000 4.000000 4.000000 4.000000 4.000000 4.000000 4.000000 4.000000 4.000000 4.000000 4.000000 4.000000 1.0
mean 77.000000 20174.500000 0.077025 0.563622 105.650440 18.699193 957.422233 37.237700 1.692623 14.065193 3046.333924 138.469724 3788.469927 28.274700 10.071061 0.659202 14.502450 381459.500000 7.702500 56.362200 1.0
std 8.082904 3088.246590 0.048227 0.065186 35.854860 6.345993 599.458961 14.424770 0.655671 4.957567 2345.141892 106.597359 1244.203375 2.047207 2.842340 0.153397 3.374745 30.730007 4.822657 6.518604 NaN
min 70.000000 17500.000000 0.034835 0.502508 74.500680 13.185961 432.997739 23.525467 1.069339 9.525211 992.934107 45.133369 2658.733414 26.382989 7.444227 0.524318 11.534991 381424.000000 3.483500 50.250800 1.0
25% 70.000000 17500.000000 0.035474 0.509763 74.650813 13.212533 440.941140 25.406233 1.154829 10.820489 2029.890125 92.267733 3205.051174 26.569436 7.697947 0.527390 11.602587 381441.250000 3.547400 50.976275 NaN
50% 77.000000 20174.500000 0.077118 0.564032 105.486999 18.670265 958.577458 37.694372 1.713381 12.989656 2384.386736 108.381215 3466.255329 28.259980 10.107465 0.659699 14.513375 381459.500000 7.711800 56.403250 NaN
75% 84.000000 22849.000000 0.118669 0.617892 136.486626 24.156925 1475.058551 49.525838 2.251174 16.234361 3400.830535 154.583206 4049.674082 29.965244 12.480579 0.791511 17.413237 381477.750000 11.866900 61.789175 NaN
max 84.000000 22849.000000 0.119029 0.623915 137.127082 24.270280 1479.536274 50.036589 2.274390 20.756249 6423.628116 291.983096 5562.635637 30.195851 12.625088 0.793094 17.448059 381495.000000 11.902900 62.391500 1.0
POR count 80.000000 80.000000 80.000000 80.000000 80.000000 80.000000 80.000000 80.000000 80.000000 80.000000 80.000000 80.000000 80.000000 80.000000 80.000000 80.000000 80.000000 80.000000 80.000000 80.000000 10.0
mean 77.687000 22145.375000 0.139399 0.682125 142.937768 25.298720 1732.724670 21.427455 0.973975 12.310883 7856.724786 357.123854 8399.901848 27.844663 13.698351 0.807106 17.756331 381409.100000 13.939865 68.212515 1.0
std 9.782795 684.457678 0.011056 0.040085 2.688434 0.475829 137.424972 2.406249 0.109375 1.383147 19843.236262 901.965285 12153.945026 1.837706 0.920554 0.026691 0.587198 599.918383 1.105590 4.008536 0.0
min 58.100000 20554.000000 0.103742 0.569525 136.885275 24.227482 1289.512285 17.161185 0.780054 9.986190 555.678808 25.258128 1056.034997 25.024370 10.777311 0.704308 15.494784 380416.000000 10.374200 56.952500 1.0
25% 66.525000 21744.250000 0.134268 0.660518 141.263542 25.002397 1668.955667 19.698584 0.895390 11.253302 1371.054361 62.320653 2356.493847 25.997069 13.208609 0.796855 17.530811 380889.750000 13.426800 66.051800 NaN
50% 84.000000 22174.000000 0.141685 0.690805 142.315059 25.188506 1761.143622 21.082278 0.958285 12.302093 2628.629541 119.483161 4289.056988 27.911659 13.836427 0.815347 17.937644 381363.500000 14.168500 69.080450 NaN
75% 84.000000 22631.750000 0.145759 0.704375 144.315316 25.542534 1811.780760 23.008398 1.045836 13.360754 4585.371311 208.425969 8137.948438 29.564993 14.383209 0.825670 18.164743 381933.250000 14.575850 70.437525 NaN
max 84.000000 23942.000000 0.159119 0.757296 150.905524 26.708942 1977.843824 27.996128 1.272551 15.470634 151096.103493 6868.004704 79333.766641 30.963682 15.127167 0.835377 18.378296 382407.000000 15.911900 75.729600 1.0

performance by CdTe tool

In [11]:
plt.figure(figsize=(9,6))
ax = sns.stripplot(x="CdTe Tool", y="percentEff", data=modstrimmed, hue= "Split1", jitter=True)
sns.violinplot(x="CdTe Tool", y="percentEff", data=modstrimmed, inner=None, color=".8", ax=ax)
plt.legend(loc="lower left", frameon=True, fontsize=10)
Out[11]:
<matplotlib.legend.Legend at 0x1441bfa3cf8>
In [12]:
lp.bycdtetool(splitlist, modstrimmed, '%s' %expt)
<matplotlib.figure.Figure at 0x1441c36fe80>

performance vs deposition order

This loader takes user input. In the example below, you can choose to plot by Split1, Split2, POR, CdTe Tool, CdS Tool, or almost anything, vs. CdTe deposition time

In [13]:
lp.bydepplots(modstrimmed)
This returns Eff, FF, Voc, Jsc, Rs and Rsh by CdTe deposition time. You can also choose 1 other variable to consider -- POR, CdTe Tool, CdS Tool, Split1, etc. Enter that variable here:  Split1
In [14]:
#lp.jointplots(modsdf, '%s' %expt)

cell plots

In [15]:
# create cells df

cellsdf = df[df.Cell_Count == 1]

def cell_shunted(row):
    if row['Rsh_Ohm'] <=500.:
        return 1
    
cellsdf['shunted'] = cellsdf.apply(lambda row: cell_shunted(row), axis=1)

def cell_shunt(row):
    if row['Rsh'] <=50.:
        return 1
    
cellsdf['shunt_count'] = cellsdf.apply(lambda row: cell_shunt(row), axis=1)

nscells = cellsdf[cellsdf.loc[:,'shunted'] != 1]

cellsdflight = cellsdf[~cellsdf['Measurement'].str.contains('.*Dark.*', na = False)]
nscellslight = nscells[~nscells['Measurement'].str.contains('.*Dark.*', na = False)]

cellgroup = cellsdflight.groupby('POR', as_index=False)
#nscellgroup = nscellslight.groupby('POR', as_index=False)

cellstrimmed =  cellsdflight.groupby(['Sample_ID','Cell_no'], as_index=False).max()
nscellstrimmed = nscellslight.groupby(['Sample_ID','Cell_no'], as_index=False).max()
lp.rankedfiltered(splitlist, cellstrimmed, '%s' %expt)
c:\anaconda3\lib\site-packages\ipykernel\__main__.py:9: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
c:\anaconda3\lib\site-packages\ipykernel\__main__.py:15: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
Module, Nonshunted module, or cell?  cell
generating figure for Split2
<matplotlib.figure.Figure at 0x1441c484828>
generating figure for Split3
generating figure for Split1

nonshunted cell and module performance compared by Split

In [16]:
#nscells, cellsdflight, nscellslight, cellstrimmed, nscellstrimmed, cells df
#modsdf, modsdflight, modstrimmed, 

#setting up grouped median and mean dfs

#modmedian = modsdf.groupby('Split1', as_index=False).median()
#modmean = modsdf.mean()
#cellmedian = cellsdf.groupby('Split1', as_index=False).median()
#cellmean = cellsdf.mean()

sns.set_palette("coolwarm")
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(figsize=(13,13), nrows=2, ncols=2)
#plt.subplot(411)
b = sns.boxplot('CdTe Tool', 'percentEff', hue='Split1', data=nonshuntedtrimmed, notch=False, ax=ax1)
# bplot = sns.boxplot('Sample_ID', 'percentEff', hue='POR', data=df, notch=False)
#plt.subplot(412)
b1 = sns.boxplot('CdTe Tool', 'percentEff', hue='Split1', data=nscellstrimmed, notch=False, ax=ax2)
#plt.subplot(421)
b2 = sns.boxplot('CdTe Tool', 'percentEff', hue='Split2', data=nonshuntedtrimmed, notch=False, ax=ax3)
#plt.subplot(422)
b3 = sns.boxplot('CdTe Tool', 'percentEff', hue='Split2', data=nscellstrimmed, notch=False, ax=ax4)

module data

In [23]:
qgrid.show_grid(modstrimmed, grid_options={'forceFitColumns': False, 'defaultColumnWidth': 200})

cell data

In [17]:
qgrid.show_grid(cellstrimmed, grid_options={'forceFitColumns': False, 'defaultColumnWidth': 200})
In [18]:
lp.expbycell(cellsdf, '%s' %expt)

Extras

Supplementary modules:

(1) T-test module: takes user input (POR, Splits, CdTe tool, etc)
(2) Past 50 experiments trends
(3) Module inspector: user selects module of interest; outputs Eff by cell (table and figure) and EL image

(1) module t-testing

(1) input t-testing parameter when asked
(2) returns all permutations, sorted by significance
* typically uses modsdflight, but could use modstrimmed (may have higher errors)
In [19]:
#the following function returns all ttests within a variable (ie Splits in Split 1, POR vs EXP, CDTE 1 vs 2)
#if instead you prefer to choose which samples to test, use: lp.ttestbychoice(df)
#df of choice is modsflight
lp.ttestall(modsdflight)
Enter t-test grouping parameter: Split1, Split2, Split3, CdTe Tool, POR, CdS Tool, etc: 
Split1


Split1 = (0, 'POR metal anneal ')
Split1 = (1, 'Metal anneal in big tool_210C')
Split1 = (2, 'Metal anneal in big tool_220C')
Split1 = (3, 'BAND GRADING')
Split1 = (4, 'Denton 84nm (POR)')
Split1 = (5, 'Sencera CdS 60nm')
Split1 = (6, 'Sencera CdS 65nm')


In [20]:
flatui = ["#3498db", "#e74c3c", "#2ecc71", "#95a5a6", "#34495e", "#9b59b6"]
sns.set(style='ticks')
sns.set_palette(flatui)
sns.set_context("poster")

#uncomment for user input
#x_low = float(input("enter beginning of trends range: "))
#x_high = float(input("enter end of trends range: "))

#previous 50 experiments
x_low = int(df.Experiment_ID.unique() - 49.)
x_high = int(df.Experiment_ID.unique()) + 1


query = 'SELECT * FROM filttrends WHERE Experiment_ID BETWEEN {0} AND {1} AND Measurement LIKE "Light" AND Cell_Count > 20 ;'
query = query.format(x_low, x_high)
trendsdf = pd.read_sql_query(query, conn)

lp.portrends(trendsdf)

(3) sample inspection

(1) use the table below to choose module 
(2) input Sample ID when asked

module data

In [21]:
samps = []

query = 'SELECT DISTINCT `Sample_ID`, Description, percentEff, Split1, Split2, Split3 from filttrends WHERE `Cell_Count` > 20 AND `Experiment_ID` = %d AND Measurement LIKE "Light";' % expt
samps = pd.read_sql_query(query, conn)

samplist = samps["Sample_ID"].tolist()
samps2 = samps.groupby('Sample_ID', as_index=False).max()
qgrid.show_grid(samps2, grid_options={'forceFitColumns': False, 'defaultColumnWidth': 200})
In [22]:
sample = (input("Enter sample ID: ")).upper()

#cursor = conn.cursor()
#cursor.execute('SELECT DISTINCT `Experiment_ID`, `Description` FROM exp WHERE `Description` OR `Split1` OR `Split2` OR `Split3` LIKE "%{0}%";', (topic))
query = 'SELECT * FROM filttrends WHERE Sample_ID LIKE "%%{0}%%" AND Cell_Count = 1 AND Measurement LIKE "Light";'
query = query.format(sample)

sampledf = pd.read_sql_query(query, conn)
qgrid.show_grid(sampledf, grid_options={'forceFitColumns': False, 'defaultColumnWidth': 200})

sns.set_style("darkgrid")
fg = sns.FacetGrid(sampledf, hue='POR', size= 6, aspect=2, )#, hue_kws=dict(marker=["^", "v"]))
fg.map(plt.scatter, 'Cell_no', 'percentEff', alpha=1,s=50)
#plt.xlim(570,700)
plt.ylim(0, 18)
plt.axhline(16.7, color='r', linestyle='-.')
pins = [1, 4, 7, 10, 13, 16, 19, 22]
for xc in pins:
    plt.axvline(x=xc, color = 'c', linestyle = ':', lw=2)
#plt.axvline(1, color='r', linestyle='-.')
plt.grid(b=True, which='both', color='0.65',linestyle='-', )
#plt.legend()
plt.tick_params(axis='both',which='minor')
plt.minorticks_on()
plt.title('%s eff by cell' % sample)

eldate = str(sampledf["Date_tm"].unique())
eldate = eldate[2:12]
#eldate = eldate[7:11]+"-0"+eldate[2:3]+"-"+eldate[4:6]
#print(eldate)

from IPython.display import Image
PATH = '//rsi-el/Users/Public/EL/'
#filename =PATH + "%s/%s.jpg" % (eldate,sample)
#print(filename)
Image(filename = PATH + "%s/%s.jpg" % (eldate,sample), width=700, height=700)
Enter sample ID: 52j4c
Out[22]:
In [ ]: